

/* This do file prepares the data for the project "Historical Family Types and Female Representation: Persistence and Change"
Prepared by Aina Gallego, Didac Queralt, and Ana Tur-Prats

Structure of the do file:
1. Append information from the councilors datasets, cleaning and recoding relevant variables
2. Add population
3. Add information on voluntary party quotas 
4. Collapse dataset at the municipal and election-year level
5. Add other relevant information (e.g., historical, municipal covariates, etc.)
6. Prepare the dataset for regression (e.g., recoding variables, label definition, sample restriction)
*/


clear all
set mo off
* New users change this path:
cd "C:\Users\atur-prats\Desktop\Replication_JoP"


**# 1. Append the councilors databases of each legislature
foreach i in 1979 1983 1987 1991 1995 1999 2003 2007 2011 2015{
append using "Datasets/00_rawdata/regidors_`i'.dta"
}

sort eyear codiine

*Cleaning and recoding other variables 

*Recoding information on parties
*Correctly coding and labeling the variable that identifies the party variable=="party"

	gen party2=party
	recode party2 (65=100)
	recode party2 (235=234)
	replace party2=27 if pname=="UPL"

***Recoding cases where we have information of pname but not in party variable 
	*Cases of ERC
	replace party2=8 if pname=="ERC" 

	*Cases of IU
	replace party2=1 if pname=="IU" 

	*Cases of PSOE
	replace party2=2 if pname=="PSOE" 

	*Cases of CIU
	replace party2=4 if pname=="CIU" 

	*Cases of Coalición Canaria
	replace party2=3 if pname=="CC" 

	label define party2 1"IU-ICV" 2"PSOE" 3"COALICION CANARIA" 6"PP" 8"ERC"14"BNG" 
	la val party2 party2
	tab party2

***Dropping variables to leave only a single variable that identifies the party
	drop pname party
	rename party2 party
	label variable party "Party"

*In case the command is not installed
*ssc inst egenmore
egen numparties = nvals(party), by(codiine eyear)


* Note: before we collapse at the municipal-year level we need to add info on quotas. Before that, we need info on population since quotas depended on population

********************************************************************************

**# 2. Add population, for each election-year

merge m:1 codiine eyear using "Datasets/00_rawdata/pop.dta", keep(3) nogen

* we drop municipalities smaller than 250 inhabitants because they follow a closed list PR
drop if pop<=250

**# 3.Add information about quotas
gen quota=.

*Some municipalities have missing ccaa, so the information about quotas cannot be attached
gen codi=codiine if codccaa==.
tostring codi, replace

gen cod_short=substr(codi, 1,1) if codiine <10000
gen cod_long=substr(codi, 1,2) if codiine >9999

destring cod_short, generate(cod_short2)
destring cod_long, generate(cod_long2)

gen province2=cod_short2
replace province2=cod_long2 if cod_long2!=.

drop cod_long2 cod_long cod_short2 cod_short codi

recode province2 1=16 2=8 3=10 4=1 5=7 6=11 7=4 8=9 9=7 10=11 11=1 12=10 13=8 ///
14=1 15=12 16=8 17=9 18=1 19=8 20=16 21=1 22=2 23=1 24=7 25=9 26=17 27=12 28=13 ///
29=1 30=14 31=15 32=12 33=3 34=7 35=5 36=12 37=7 38=5 39=6 40=7 41=1 42=7 43=9 ///
44=2 45=8 46=10 47=7 48=16 49=7 50=2, gen(codccaa2)

replace codccaa=codccaa2 if codccaa2!=. & codccaa==.
drop province2 codccaa2

recode quota .=0 if eyear ==1979 
recode quota .=0 if eyear ==1983 

* 1982: PSOE approves a 12% quota limited to Catalonia region (more details in Appendix M)
replace quota=12 if codccaa == 9 & eyear ==1983 & party==2

recode quota .=0 if eyear ==1987

* 1987: PSOE in Catalonia approves a 15% quota
replace quota=15 if codccaa == 9 & eyear==1987 & party==2  

recode quota .=0 if eyear ==1991

* 1990: IU approves a 35% quota (30% in Catalonia) and PSOE a 25% quota at the national level
replace quota=35 if eyear==1991 & party==1 
replace quota=30 if eyear==1991 & party==1 & codccaa==9
replace quota=25 if eyear==1991 & party==2  

recode quota .=0 if eyear==1995
replace quota=35 if eyear==1995 & party==1
replace quota=30 if eyear==1995 & party==1 & codccaa==9
replace quota=25 if eyear==1995 & party==2

* 1996: PSOE in Catalonia approves a 30% quota
* 1997: PSOE and UI approve 40% quota
* 1998: BNG (Galicia) approves 40% quota
recode quota .=0 if eyear==1999
replace quota=40 if eyear==1999 & party==1
replace quota=30 if eyear==1999 & party==1 & codccaa==9
replace quota=40 if eyear==1999 & party==2
replace quota=30 if eyear==1999 & party==2 & codccaa==9
replace quota=40 if eyear ==1999 & party==14 & codccaa == 12 // BNG


* 2000: PSC (PSOE in Catalonia) and CC approve 40% quota
recode quota .=0 if eyear==2003
replace quota=40 if eyear==2003 & party==1
replace quota=40 if eyear==2003 & party==2
replace quota=40 if codccaa == 12 & eyear ==2003 & party==14  // BNG
replace quota=40 if codccaa == 5 & eyear ==2003 & party==3 // CC (Coalicion Canaria )


*2007-2011
*Quotas adopted in municipalities above 5000 inhabitants in 2007 and above 3000 in 2011
recode quota .=40 if eyear==2007 & pop>4999
recode quota .=40 if eyear==2011 & pop>2999
recode quota .=40 if eyear==2015 & pop>2999

* 2004: ERC approves 40% quota
recode quota .=40 if eyear==2007 & pop<5000 & party==1 
recode quota .=40 if eyear==2007 & pop<5000 & party==2
recode quota .=40 if eyear==2007 & pop<5000 & party==3
recode quota .=40 if eyear==2007 & pop<5000 & party==14
recode quota .=40 if eyear==2007 & pop<5000 & party==8 // ERC

recode quota .=40 if eyear==2011 & pop<3000 & party==1
recode quota .=40 if eyear==2011 & pop<3000 & party==2
recode quota .=40 if eyear==2011 & pop<3000 & party==3
recode quota .=40 if eyear==2011 & pop<3000 & party==14
recode quota .=40 if eyear==2011 & pop<3000 & party==8 // ERC

recode quota .=40 if eyear==2015 & pop<3000 & party==1
recode quota .=40 if eyear==2015 & pop<3000 & party==2
recode quota .=40 if eyear==2015 & pop<3000 & party==3
recode quota .=40 if eyear==2015 & pop<3000 & party==14
recode quota .=40 if eyear==2015 & pop<3000 & party==8 // ERC

recode quota .=0 if eyear==2007 & pop<5000
recode quota .=0 if eyear==2011 & pop<3000
recode quota .=0 if eyear==2015 & pop<3000

recode quota (0=0) (1/100=1), gen(quota_binary)


********************************************************************************
* Indicator for municipalities in which PSOE has representation
gen psoe=1 if party==2
replace psoe=0 if party!=2
replace psoe=. if party==.

**# 4. Collapse data at the municipal-election(year) level

collapse (mean) gender cprov numparties  codccaa quota pop (max) quota_binary psoe, by(eyear codiine)		

recode pop (1/49999=0) (50000/10000000=1), gen(bigcity)

**# 5. Add other relevant information
** Information from historical 1860 census on family types and other relevant variables for analysis
merge m:1 codiine using "Datasets/00_rawdata/census_1860.dta"

keep if _merge==3
drop _merge

*Stem family dummy
recode stem (0/1.05=0) (1.05/2=1), gen(stem_dummy)
recode stem (0/1.02=0) (1.02/2=1), gen(stem_dummy2)
recode stem (0/1.075=0) (1.075/2=1), gen(stem_dummy3)

**# 6. Prepare the dataset for regression analysis
* We generate the instrument
cap drop descent
	gen descent=0
	replace descent=1 if codccaa==2 | codccaa==4 | codccaa==9 | codccaa==10 | codccaa==15 | cprov==48
	/* descent: Navarra, Aragon, Catalunya, Valencia, Balearic Island, and Vizcaya province */

* Merge in caloric yield information at the municipal level	
merge m:1 codiine using	"Datasets/00_rawdata/municipal_controls.dta"
drop if _m==2
drop _m
replace coastal=0 if coastal==.

*there is one observation with a wrong codccaa
drop if codccaa>14 & codccaa<15

*Standardize stem family measure
egen stem_std = std(stem)

gen genderpc = gender * 100

**** We drop Ceuta and Melilla (Spanish territories in Northern Africa)
drop if codccaa==51 | codccaa==52
drop if codccaa==18 | codccaa==19

*** We also drop Canary Islands
drop if codccaa==5

* We generarate additional controls for Table 2
gen share_ag_pop=ag_pop/population
gen ratio_laborers_landlords=laborers/landlords

gen share_priests=(priests/population)*1000
gen share_mass_att=(mass_att/population)*1000
gen share_nuns_monks=(nuns_monks/population)*1000

gen singles_per_inhab = singles_m/population
gen male_population=population-female_population

gen sex_ratio=.
replace sex_ratio=male_population/female_population
replace sex_ratio=. if population==. | female_population==.

gen bigcity_stem=0
replace bigcity_stem=1 if bigcity==1 & stem_dummy==1
replace bigcity_stem=. if bigcity==. | stem_dummy==.
gen share_migrant=(women_migrant+men_migrant)/population

encode district, gen(hist_cpj)

gen pop2=pop^2
rename eyear year

* Label variables 
label variable stem "Stem family 1860 census"
label variable stem_std "Stem family 1860 census, standardized"

label define stem_lab 0 "Nuclear" 1 "Stem"
label values stem_dummy stem_lab
tab stem_dummy

label variable genderpc "Share of female councilors"

label variable codiine "Municipality ID"
label variable hist_cpj "Historical district"
label variable cprov "Province"
label variable codccaa "Autonomous Region"

label variable year "Election year"

label variable pop "Population"
label variable pop2 "Population squared"
label variable bigcity "Indicator for city >50,000 inhabitants"
label variable sup "Municipal area"
label variable coastal "Indicator for coastal municipality"
label variable calories_muni "Caloric suitability"
label variable temperature "Temperature"
label variable precip "Rainfall"
label variable ruggedness "Ruggedness index"

label variable numparties "Number of parties"

label variable descent "Indicator for province with freedom of testation in 13th century"

label variable psoe "PSOE (Partido Socialista Obrero Espanol) party"

label define quota_binary_lab 0 "No voluntary quota" 1 "Voluntary quota"
label values quota_binary quota_binary_lab

label variable sex_ratio "Sex ratio 1860 census"
label variable female_population "Female population 1860 census"
label variable male_population "Male population 1860 census"
label variable population "Total population 1860 census"

label variable share_priests "Share priests (multiplied by 1000) 1860 census"
label variable share_nuns_monks "Share monks and nuns (multiplied by 1000) 1860 census"
label variable share_mass_att "Share mass attendants (multiplied by 100) 1860 census"

label variable ratio_laborers_landlords "Ratio of laborers to landlords 1860 census"
label variable share_ag_pop "Share of the population working in the agricultural sector 1860 census"

label variable singles_per_inhab "Share of single men 1860 census"
label variable share_migrant "Share of internal migrants 1860 census"

save "Datasets/01_processeddata/main_analysis_dataset.dta", replace

